knitr::opts_chunk$set(echo = TRUE, message=FALSE)
Task design and analysis is a collaborative effort between Kuppuraj, Paul Thompson, Mihaela Duta and Dorothy Bishop.
Analysis of data from task version 5, August 2017.
The sequence of triplets used in this task is as follows for each set:
1, A1, S1, B1; %Adjacent deterministic
2, A1, S1, B1;
3, C1, S2, D1; %Adjacent probabilistic
4, C1, S2, D2;
5, E1, R, F1;%Non-adjacent deterministic. R is random
6, E1, R, F1;
7, R, R, R; %Totally random
8, R, R, R;
This code is used in the Matlab program used to generate stimulus sequences, which is kuppuseqgen_2A2C2E2R_seqgen.m
This uses stimulus specifications from: ‘all_stimuli_routines_3cond_kup_final.xlsx’ which is a giant matrix listing all word stimuli, and documenting where there are ‘conflicts’, such that two stimuli should not co-occur in the same array - either because of the same first phoneme, or because of visual confusibility.
The matlab program also checks that two triplets with the same initial two elements do not occur in succession.
Terminology when referring to the experiment:
Current version of analysis in R is ‘mixed effects regression discontinuity_V5.R’ Useful references for linear mixed effects modeling are: http://bbolker.github.io/mixedmodels-misc/glmmFAQ.html
https://rstudio-pubs-static.s3.amazonaws.com/33653_57fc7b8e5d484c909b615d8633c01d51.html
https://stats.stackexchange.com/questions/242109/model-failed-to-converge-warning-in-lmer
Initial steps: may need to install packages.
#setwd("c:/Users/pthompson/Dropbox/SL and WM paper/Revision/Pilot_Data_Revised task/V5")
#directory where data are found - currently set to Mac
options(scipen=999) #avoid scientific format
#library(devtools)
#install_github(repo = "RDDtools", username = "MatthieuStigler", subdir = "RDDtools")
library(RDDtools)
library(optimx)
library(lme4)
library(ggplot2)
library(doBy)
library(gridExtra)
library(tidyverse)
library(RColorBrewer)
library(splines)
#substitute windows for quartz if nonmac
if(.Platform$OS.type=="windows") {
quartz<-function() windows()
}
breakblock<-c(7,8)
nsets<-5
nblocks<-10
phase1.start<-1
phase1.end<-nsets*(breakblock[1]-1)
phase2.start<-phase1.end+1
phase2.end<-nsets*(breakblock[2])
phase3.start<-phase2.end+1
phase3.end<-nsets*nblocks
phase1.range<-c(phase1.start:phase1.end)
phase2.range<-c(phase2.start:phase2.end)
phase3.range<-c(phase3.start:phase3.end)
namelist=c('pilot_01','pilot_02','pilot_03','pilot_04','pilot_05','pilot_06','pilot_07','pilot_08','pilot_09')
nsubs<-length(namelist)
#initialise data frames for holding main data and regression coefficients for individual participants
main.data<-data.frame(ID=factor(), SetInd=integer(), Type=integer(), TargetRT=integer())
thistype<-c('rand','Adj_D','Adj_P','Non_D')
lmsummarycoefs<-data.frame(matrix(NA,nsubs*25,nrow=nsubs))
Now process each person’s data and add to main summary file
for (i in 1:nsubs){
myname=namelist[i]
mycsv=paste("c:/Users/pthompson//Dropbox/SL and WM paper/Revision/Pilot_Data_Revised task/V5/",myname,".csv",sep="")
mycsv=paste("/Users/dorothybishop//Dropbox/SL and WM paper/Revision/Pilot_Data_Revised task/V5/",myname,".csv",sep="")
mydata= read.csv(mycsv) # read csv file
#get rid of RTs of inaccurate responses:replace with NA.
Rwdata=mydata
rawdata=Rwdata[ ,c(1,10,12,26)]
RTcutoff<-2000
##############
rawdata$TargetRT[rawdata$TargetACC==0]<-NA
rawdata$TargetRT[rawdata$TargetRT<(-199)]<-NA #include anticipations up to -200
rawdata$TargetRT[rawdata$TargetRT>RTcutoff]<-RTcutoff #set long RTs to the RTcutoff value
RWdata<-rawdata
#rename the types
RWdata$Type[RWdata$Type==1]<- "Adj_D"
RWdata$Type[RWdata$Type==2]<- "Adj_D"
RWdata$Type[RWdata$Type==3]<- "Adj_P"
RWdata$Type[RWdata$Type==4]<- "Adj_P"
RWdata$Type[RWdata$Type==5]<- "Non_D"
RWdata$Type[RWdata$Type==6]<- "Non_D"
RWdata$Type[RWdata$Type==7]<- "rand"
RWdata$Type[RWdata$Type==8]<- "rand"
RWdata$Type<-as.factor(RWdata$Type)
RWdata$Type<-factor(RWdata$Type,levels=c("rand", "Adj_D", "Adj_P", "Non_D"))
#ensure random is the first in the list, as this will be baseline comparison in analysis
#Create a new matrix that has summary data for this participant.
detaildata<- summaryBy(TargetRT ~ SetInd+Type, data=RWdata,
FUN=c(min), na.rm=TRUE)
#NB only 2 points to consider, so now taking minimum, not median
detaildata$ID<-rep(RWdata$ID[4],length(detaildata[,1]))
#Need to repeat the subject ID on each line
names(detaildata)<-c("SetInd", "Type", "TargetRT", "ID") #column headings
main.data<-rbind(main.data,detaildata) #add to main data in long form with participants stacked below each other
}
The package RDDtools makes it easy to compare the slopes of two portions of data. We do this for phase 1 (learning) and phase 2 (break-sequence). The t-value for this comparison can be used as an index of learning. The outputs are written to a .csv file for inspection.
library()
#add coeffs for reg discontinuity, just for learning/random phases
lmsummarycoefs<-data.frame(matrix(NA,24,nrow=nsubs))#initialise matrix
for (i in 1:nsubs){
startcol<- -5 #set so that this will append columns to right place (Historical)
for (mytype in 1:4){
mytemp<-filter(main.data,ID==namelist[i],Type==thistype[mytype],SetInd<41)
# using the RDDtools package
RDD.temp<-RDDdata(y=mytemp$TargetRT,x=mytemp$SetInd,cutpoint=31)
reg_para <- RDDreg_lm(RDDobject = RDD.temp, order = 1) #this is just linear: for higher order can increase
startcol<-startcol+6
endcol<-startcol+3
lmsummarycoefs[i,startcol:endcol]<-reg_para$coefficients
st<-summary(reg_para)[[4]]
myt<-st[2,3]#t-value corresponding to difference in slope for two phases
lmsummarycoefs[i,endcol+1]<-round(myt,2)
myp<-summary(reg_para)$coefficients[2,4]#sig of slope diff for the two phases
lmsummarycoefs[i,endcol+2]<-round(myp,4)
colnames(lmsummarycoefs)[startcol:endcol]<-paste0(thistype[mytype],'.',names(reg_para$coefficients))
colnames(lmsummarycoefs)[endcol+1]<-paste0(thistype[mytype],'_t.diff')
colnames(lmsummarycoefs)[endcol+2]<-paste0(thistype[mytype],'_p.diff')
}
}
#write.csv(lmsummarycoefs, file = paste0(datadir,"lm_discont_coeffs_linear.csv"))
shortbit<-select(lmsummarycoefs,5,6,11,12,17,18,23,24) #just cols for t-values and p
rownames(shortbit)<-namelist
shortbit
## rand_t.diff rand_p.diff Adj_D_t.diff Adj_D_p.diff Adj_P_t.diff
## pilot_01 -0.38 0.7054 3.08 0.0040 1.39
## pilot_02 -1.81 0.0785 1.80 0.0796 -1.50
## pilot_03 -0.17 0.8680 3.28 0.0023 1.23
## pilot_04 -0.04 0.9654 8.23 0.0000 6.06
## pilot_05 0.67 0.5068 6.79 0.0000 5.08
## pilot_06 -0.97 0.3406 -0.86 0.3969 3.27
## pilot_07 -0.89 0.3777 4.74 0.0000 3.54
## pilot_08 1.87 0.0700 0.89 0.3814 1.35
## pilot_09 -0.32 0.7524 2.89 0.0065 4.01
## Adj_P_p.diff Non_D_t.diff Non_D_p.diff
## pilot_01 0.1737 0.52 0.6058
## pilot_02 0.1421 0.04 0.9711
## pilot_03 0.2271 1.89 0.0670
## pilot_04 0.0000 5.11 0.0000
## pilot_05 0.0000 4.20 0.0002
## pilot_06 0.0024 0.02 0.9816
## pilot_07 0.0011 10.31 0.0000
## pilot_08 0.1866 0.29 0.7711
## pilot_09 0.0003 1.53 0.1346
#can view shortbit to see which participants show which effects
# Modifications to main.data to create factors etc
main.data$ID <- as.factor(main.data$ID)
main.data$log_TargetRT<-log(main.data$TargetRT+200) #to allow for anticipatory responses
main.data<-data.frame(main.data)
#png(filename="c:/Users/pthompson//Dropbox/SL and WM paper/Revision/Pilot_Data_Revised task/V5/Grandavg.png")
summarytable<-summaryBy(TargetRT~SetInd+Type,data=main.data,FUN=c(mean),na.rm=TRUE)
ggplot(summarytable,aes(x = SetInd, y = TargetRT.mean,color=Type)) +
geom_line()
# dev.off()
This allows us to see how well the regression discontinuity value identifies those who seem to be learning. The horizontal grey lines denote the break-seq blocks. (NB I can’t currently get this to incorporate the plot in output except by writing to file then reading back)
#png(filename="c:/Users/pthompson//Dropbox/SL and WM paper/Revision/Pilot_Data_Revised task/V5/Indivdata.png",width = 1000, height = 300)
#quartz(width=12,height=3)
ggplot(main.data, aes(x = SetInd, y = TargetRT,color=Type)) +
geom_line(alpha=0.75) +
geom_vline(aes(xintercept = 30), color = 'grey', size = 1, linetype = 'dashed') +
geom_vline(aes(xintercept = 40), color = 'grey', size = 1, linetype = 'dashed') +
theme_bw()+facet_wrap(~ID)+ scale_fill_brewer(palette="Set1")+
theme(legend.position = "top",strip.text=element_text(size=14),axis.text=element_text(size=14),axis.title=element_text(size=14,face="bold"))
#dev.off()
#create new variables
main.data$ID <- as.factor(main.data$ID)
#main.data$log_TargetRT<-log(main.data$TargetRT+200) #to allow for anticipatory responses
main.data<-data.frame(main.data)
#####################################################################################
#create new variables
main.data$phase1<-ifelse(main.data$SetInd %in% phase1.range,1,0)
main.data$phase2<-ifelse(main.data$SetInd %in% phase2.range,1,0) #block where seq broken
main.data$phase3<-ifelse(main.data$SetInd %in% phase3.range,1,0) #block where seq restored
main.data$p123<-as.factor(main.data$phase1+2*main.data$phase2+3*main.data$phase3)
levels(main.data$p123)<-c('Learn','Break','Final')
main.data$p123<-factor(main.data$p123,levels=c("Break","Learn","Final"))#reorder factor levels
main.data$allb<-main.data$SetInd_c
w<-which(main.data$p123=='Break')
main.data$allb[w]<-main.data$b2[w]
w2<-which(main.data$p123=='Final')
main.data$allb[w2]<-main.data$b3[w2]
#redo without centring at this point
myseq<-seq(1,nblocks*nsets)
bp1<-myseq[phase1.end]+1
bp2<-myseq[phase2.end]+1
#functions to determine if value x is in phase1, phase2 or phase3
#So this ignores values for sets not in this phase (sets to zero), but has all other phases scaled
#so that they show increment across phase
b1make <- function(x, bp1) ifelse(x < bp1, x, 0) #NB altered by DB from bp1-x
b2make <- function(x, bp1, bp2) ifelse(x >= bp1 & x < bp2, x - bp1+1, 0)
b3make <- function(x, bp2) ifelse(x < bp2, 0, x - bp2+1)
#add b1, b2, b3 to main.data rather than computing during regression
#(also helps understand these functions by seeing how they relate to set)
main.data$b1<-b1make(main.data$SetInd,bp1)-mean(b1make(main.data$SetInd,bp1))
main.data$b2<-b2make(main.data$SetInd,bp1,bp2)-mean(b2make(main.data$SetInd,bp1,bp2))
main.data$b3<-b3make(main.data$SetInd,bp2)-mean(b2make(main.data$SetInd,bp1,bp2))
vshort.data<-main.data[which(main.data$p123=='Learn'),]
mod.3vc <- lmer(TargetRT ~ Type #nb will automatically do main effects as well
+(0+b1|ID)+(0+b1|ID:Type), data = vshort.data,
REML = TRUE,control = lmerControl(optimizer = "optimx",
calc.derivs = TRUE, optCtrl = list(method = "nlminb")))
saveRDS(mod.3vc, "mod.3vc.rds")
#saveRDS(mod.3vb, "mod.3vb.rds")
#This fits with no warning messages.
summary(mod.3vc)
## Linear mixed model fit by REML ['lmerMod']
## Formula: TargetRT ~ Type + (0 + b1 | ID) + (0 + b1 | ID:Type)
## Data: vshort.data
## Control:
## lmerControl(optimizer = "optimx", calc.derivs = TRUE, optCtrl = list(method = "nlminb"))
##
## REML criterion at convergence: 14085.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.2957 -0.5579 0.0468 0.6194 4.2569
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID:Type b1 60.46 7.776
## ID b1 127.22 11.279
## Residual 25385.20 159.327
## Number of obs: 1080, groups: ID:Type, 36; ID, 9
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 654.23 11.68 56.03
## TypeAdj_D -243.46 16.41 -14.83
## TypeAdj_P -182.67 16.41 -11.13
## TypeNon_D -74.24 16.41 -4.52
##
## Correlation of Fixed Effects:
## (Intr) TypA_D TypA_P
## TypeAdj_D -0.703
## TypeAdj_P -0.703 0.500
## TypeNon_D -0.703 0.500 0.500
ranef(mod.3vc)
## $`ID:Type`
## b1
## pilot_01:rand -0.6734562
## pilot_01:Adj_D 2.6008124
## pilot_01:Adj_P 1.6675079
## pilot_01:Non_D -1.9574593
## pilot_02:rand -3.2893863
## pilot_02:Adj_D 2.7328078
## pilot_02:Adj_P 3.1780465
## pilot_02:Non_D -1.7294831
## pilot_03:rand 7.2011086
## pilot_03:Adj_D -8.8658447
## pilot_03:Adj_P -4.3289490
## pilot_03:Non_D 2.7443989
## pilot_04:rand 11.9325441
## pilot_04:Adj_D -9.4900449
## pilot_04:Adj_P -4.2592186
## pilot_04:Non_D -7.8037263
## pilot_05:rand 10.0261229
## pilot_05:Adj_D -9.6395486
## pilot_05:Adj_P -5.1360250
## pilot_05:Non_D -2.0129672
## pilot_06:rand -3.7780837
## pilot_06:Adj_D 5.9965759
## pilot_06:Adj_P -0.4057772
## pilot_06:Non_D -1.5763888
## pilot_07:rand 12.9632696
## pilot_07:Adj_D -3.6625555
## pilot_07:Adj_P 0.1172250
## pilot_07:Non_D -17.9932035
## pilot_08:rand -4.1900310
## pilot_08:Adj_D 1.7704182
## pilot_08:Adj_P 3.7041056
## pilot_08:Non_D -1.3017082
## pilot_09:rand 8.6710378
## pilot_09:Adj_D -8.2184924
## pilot_09:Adj_P -4.5116856
## pilot_09:Non_D 2.6166806
##
## $ID
## b1
## pilot_01 3.44533393
## pilot_02 1.87686401
## pilot_03 -6.83696300
## pilot_04 -20.24279382
## pilot_05 -14.22909458
## pilot_06 0.49726423
## pilot_07 -18.04358282
## pilot_08 -0.03622373
## pilot_09 -3.03514134
par(mfrow=c(1,2))
plot(mod.3vc,type=c("p","smooth"))
plot(mod.3vc,sqrt(abs(resid(.)))~fitted(.),type=c("p","smooth"),ylab=expression(sqrt(abs(resid))))
plot(mod.3vc,resid(.,type="pearson")~b1, type=c("p","smooth"))
#dev.off()
#quartz()
#png(filename="merplot3.png")
qqnorm(resid(mod.3vc))
qqline(resid(mod.3vc)) # shape due to using whole numbers 1:40.
hist(resid(mod.3vc),100)
newdat1<-expand.grid(Type=unique(vshort.data$Type),b1=unique(vshort.data$b1),ID=unique(vshort.data$ID))
library(RColorBrewer)
library(ggplot2)
ggplot(vshort.data, aes(x = b1, y = TargetRT,color=Type)) +
geom_point(alpha=0.35) +
geom_line(data=newdat1,aes(y=predict(mod.3vc,newdata=newdat1)),size = .75)+
theme_bw()+facet_wrap(~ID)+ scale_fill_brewer(palette="Set1")+
theme(legend.position = "top",strip.text=element_text(size=12),axis.text=element_text(size=12),axis.title=element_text(size=12,face="bold"))
library(lme4)
library(plyr)
N=50
nsim<-1
expdat_rd<-expand.grid(Type=factor(1:4),b1=unique(vshort.data$b1),ID=factor(1:N))
#Recode into familiar factors for Kuppu/DB hypotheses.
expdat_rd$Type<-car::recode(expdat_rd$Type,"1='Non_D';2='Adj_D';3='Adj_P';4='rand'")
expdat_rd$Type<-factor(expdat_rd$Type,levels=c("rand","Adj_D", "Adj_P", "Non_D"))
#Extract parameters for simulating data. (artificially reduce the effects of setInd and Type to see if power is working. Are the estimates reasonable)
newparams_rd <- list(
beta = getME(mod.3vc,"beta"), #fixed effects (intercept, Block, TPPR)
theta = getME(mod.3vc, "theta"),#Random slope
sigma = getME(mod.3vc, "sigma"))
# Simulate:
ss <- simulate(~ Type+(0+b1|ID)+(0+b1|ID:Type), nsim = nsim, newdata = expdat_rd, newparams = newparams_rd, family=gaussian, re.form=~0, allow.new.levels=TRUE)
#####################################################################
expdat_rd$TargetRT <- ss[, 1]
new1<- lmer(TargetRT ~ Type+(0+b1|ID)+(0+b1|ID:Type), data = expdat_rd, REML = TRUE,control = lmerControl(optimizer = "optimx", calc.derivs = TRUE, optCtrl = list(method = "nlminb")))
summary(new1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: TargetRT ~ Type + (0 + b1 | ID) + (0 + b1 | ID:Type)
## Data: expdat_rd
## Control:
## lmerControl(optimizer = "optimx", calc.derivs = TRUE, optCtrl = list(method = "nlminb"))
##
## REML criterion at convergence: 78373.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.5506 -0.6627 -0.0066 0.6725 3.2753
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID:Type b1 71.48 8.454
## ID b1 157.87 12.564
## Residual 25112.36 158.469
## Number of obs: 6000, groups: ID:Type, 200; ID, 50
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 656.709 4.942 132.87
## TypeAdj_D -233.627 6.953 -33.60
## TypeAdj_P -186.484 6.953 -26.82
## TypeNon_D -77.128 6.953 -11.09
##
## Correlation of Fixed Effects:
## (Intr) TypA_D TypA_P
## TypeAdj_D -0.703
## TypeAdj_P -0.703 0.500
## TypeNon_D -0.703 0.500 0.500
ifelse(any(round(sqrt(diag(vcov(new1))),3)>20),print("SE: too large"),print("SE: OK"))
## [1] "SE: OK"
## [1] "SE: OK"
The model has been extended to allow a regression discontinuity with random slopes and interactions between slope and Type
##########################################################################
breakblock<-c(7,8)
nsets<-5
nblocks<-10
phase1.start<-1
phase1.end<-nsets*(breakblock[1]-1)
phase2.start<-phase1.end+1
phase2.end<-nsets*(breakblock[2])
phase3.start<-phase2.end+1
phase3.end<-nsets*nblocks
phase1.range<-c(phase1.start:phase1.end)
phase2.range<-c(phase2.start:phase2.end)
phase3.range<-c(phase3.start:phase3.end)
#substitute windows for quartz if nonmac
if(.Platform$OS.type=="windows") {
quartz<-function() windows()
}
##########################################################################
namelist=c('pilot_01','pilot_02','pilot_03','pilot_04','pilot_05','pilot_06','pilot_07',
'pilot_08','pilot_09')
nsubs<-length(namelist)
thistype<-c('rand','Adj_D','Adj_P','Non_D')
main.data<-data.frame(ID=factor(), SetInd=integer(), Type=integer(), TargetRT=integer())
##########################################################################
for (i in 1:nsubs){
myname=namelist[i]
mycsv=paste("c:/Users/pthompson//Dropbox/SL and WM paper/Revision/Pilot_Data_Revised task/V5/",myname,".csv",sep="")
mycsv=paste("/Users/dorothybishop//Dropbox/SL and WM paper/Revision/Pilot_Data_Revised task/V5/",myname,".csv",sep="")
mydata= read.csv(mycsv) # read csv file
#get rid of RTs of inaccurate responses and any RT greater than 3000 ms:replace with NA.
Rwdata=mydata
rawdata=Rwdata[ ,c(1,10,12,26)]
#in the original analysis the following section will be removed
#######this bit ensures inaccurate becomes, so not eliminated in next section, and at least RT can be extracted.
rawdata$TargetRT[rawdata$Type==19 & rawdata$TargetRT>3000]<-2999
##############
rawdata$TargetRT[rawdata$TargetACC==0]<-NA
rawdata$TargetRT[rawdata$TargetRT<-199]<-NA #include anticipations up to -200
rawdata$TargetRT[rawdata$TargetRT>2000]<-2000
RWdata<-rawdata
#rename the types so median can be taken for each block
RWdata$Type[RWdata$Type==1]<- "Adj_D"
RWdata$Type[RWdata$Type==2]<- "Adj_D"
RWdata$Type[RWdata$Type==3]<- "Adj_P"
RWdata$Type[RWdata$Type==4]<- "Adj_P"
RWdata$Type[RWdata$Type==5]<- "Non_D"
RWdata$Type[RWdata$Type==6]<- "Non_D"
RWdata$Type[RWdata$Type==7]<- "rand"
RWdata$Type[RWdata$Type==8]<- "rand"
#RWdata$ID<-substring(RWdata$ID,1,2)
RWdata$Type<-as.factor(RWdata$Type)
RWdata$Type<-factor(RWdata$Type,levels=c("rand", "Adj_D", "Adj_P", "Non_D"))
detaildata<- summaryBy(TargetRT ~ SetInd+Type, data=RWdata,
FUN=c(min), na.rm=TRUE)
#NB only 2 points to consider, so now taking minimum, not median
# ah, but NB for Adj_Prob there are 4 points...
detaildata$ID<-rep(RWdata$ID[4],length(detaildata[,1]))
names(detaildata)<-c("SetInd", "Type", "TargetRT", "ID")
main.data<-rbind(main.data,detaildata) #add to main data file in long form
}
main.data$ID <- as.factor(main.data$ID)
#main.data$log_TargetRT<-log(main.data$TargetRT+200) #to allow for anticipatory responses
main.data<-data.frame(main.data)
#create new variables
main.data$phase<-ifelse(main.data$SetInd %in% phase1.range,1,0)
main.data$phase<-as.factor(main.data$phase)
levels(main.data$phase)<-c('Break','Learn')
#redo without centring at this point
myseq<-seq(1,nblocks*nsets)
bp1<-myseq[phase1.end]+1
#functions to determine if value x is in phase1, phase2 or phase3
#So this ignores values for sets not in this phase (sets to zero), but has all other phases scaled
#so that they show increment across phase
b1make <- function(x, bp1) ifelse(x < bp1, bp1-x, 0)
b2make <- function(x, bp1) ifelse(x >= bp1, x - bp1+1, 0)
main.data$b1<-b1make(main.data$SetInd,bp1)
main.data$b2<-b2make(main.data$SetInd,bp1)
##########################################################################################
twophase.data<-main.data[main.data$SetInd<=40,]
mod.new <- lmer(TargetRT ~ Type*phase + b2*Type+
+(0+b1|ID:Type)+(0+b1|ID), data = twophase.data,
REML = TRUE,control = lmerControl(optimizer = "optimx",
calc.derivs = TRUE, optCtrl = list(method = "nlminb")))
##########################################################################################
summary(mod.new)
## Linear mixed model fit by REML ['lmerMod']
## Formula: TargetRT ~ Type * phase + b2 * Type + +(0 + b1 | ID:Type) + (0 +
## b1 | ID)
## Data: twophase.data
## Control:
## lmerControl(optimizer = "optimx", calc.derivs = TRUE, optCtrl = list(method = "nlminb"))
##
## REML criterion at convergence: 18848.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.3386 -0.5490 0.0427 0.6136 4.4484
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID:Type b1 22.55 4.749
## ID b1 55.67 7.462
## Residual 28260.16 168.108
## Number of obs: 1440, groups: ID:Type, 36; ID, 9
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 612.31852 38.27978 15.996
## TypeAdj_D -66.99259 54.13578 -1.237
## TypeAdj_P -23.28148 54.13578 -0.430
## TypeNon_D 4.68148 54.13578 0.086
## phaseLearn -15.65979 42.56128 -0.368
## b2 -1.34478 6.16935 -0.218
## TypeAdj_D:phaseLearn -299.51708 59.75815 -5.012
## TypeAdj_P:phaseLearn -257.12820 59.75815 -4.303
## TypeNon_D:phaseLearn -214.75811 59.75815 -3.594
## TypeAdj_D:b2 5.80875 8.72477 0.666
## TypeAdj_P:b2 1.25522 8.72477 0.144
## TypeNon_D:b2 0.04377 8.72477 0.005
##
## Correlation of Fixed Effects:
## (Intr) TypA_D TypA_P TypN_D phsLrn b2 TA_D:L TA_P:L TN_D:L
## TypeAdj_D -0.707
## TypeAdj_P -0.707 0.500
## TypeNon_D -0.707 0.500 0.500
## phaseLearn -0.899 0.636 0.636 0.636
## b2 -0.886 0.627 0.627 0.627 0.797
## TypAdj_D:pL 0.641 -0.906 -0.453 -0.453 -0.702 -0.568
## TypAdj_P:pL 0.641 -0.453 -0.906 -0.453 -0.702 -0.568 0.500
## TypNn_D:phL 0.641 -0.453 -0.453 -0.906 -0.702 -0.568 0.500 0.500
## TypAdj_D:b2 0.627 -0.886 -0.443 -0.443 -0.564 -0.707 0.803 0.402 0.402
## TypAdj_P:b2 0.627 -0.443 -0.886 -0.443 -0.564 -0.707 0.402 0.803 0.402
## TypeNn_D:b2 0.627 -0.443 -0.443 -0.886 -0.564 -0.707 0.402 0.402 0.803
## TA_D:2 TA_P:2
## TypeAdj_D
## TypeAdj_P
## TypeNon_D
## phaseLearn
## b2
## TypAdj_D:pL
## TypAdj_P:pL
## TypNn_D:phL
## TypAdj_D:b2
## TypAdj_P:b2 0.500
## TypeNn_D:b2 0.500 0.500
ranef(mod.new)
## $`ID:Type`
## b1
## pilot_01:rand -4.61489713
## pilot_01:Adj_D 6.95328761
## pilot_01:Adj_P 0.75824069
## pilot_01:Non_D 1.59650517
## pilot_02:rand -5.18904347
## pilot_02:Adj_D 5.36569239
## pilot_02:Adj_P 1.95828364
## pilot_02:Non_D 1.68765647
## pilot_03:rand -0.99623407
## pilot_03:Adj_D 4.68485109
## pilot_03:Adj_P -2.73180705
## pilot_03:Non_D 1.35179123
## pilot_04:rand -3.62296921
## pilot_04:Adj_D 2.05391348
## pilot_04:Adj_P 0.23821344
## pilot_04:Non_D 2.42985991
## pilot_05:rand -0.50401251
## pilot_05:Adj_D -2.89764584
## pilot_05:Adj_P -1.93261844
## pilot_05:Non_D 6.35905275
## pilot_06:rand -6.35259815
## pilot_06:Adj_D 6.84156878
## pilot_06:Adj_P 1.14316038
## pilot_06:Non_D 1.69341561
## pilot_07:rand -0.33981282
## pilot_07:Adj_D -3.75950245
## pilot_07:Adj_P 4.28323087
## pilot_07:Non_D -0.03530841
## pilot_08:rand -6.41491353
## pilot_08:Adj_D 5.40411196
## pilot_08:Adj_P 3.49418321
## pilot_08:Non_D 1.28276219
## pilot_09:rand -0.39248567
## pilot_09:Adj_D -6.11923772
## pilot_09:Adj_P 1.57637161
## pilot_09:Non_D 6.92920910
##
## $ID
## b1
## pilot_01 11.5848714
## pilot_02 9.4359505
## pilot_03 5.6987153
## pilot_04 2.7128932
## pilot_05 2.5296298
## pilot_06 8.2090157
## pilot_07 0.3668325
## pilot_08 9.2966172
## pilot_09 4.9217792
#regression diagnostics
#quartz()
# png(filename="merplot2.png")
par(mfrow=c(1,2))
plot(mod.new,type=c("p","smooth"))
plot(mod.new,sqrt(abs(resid(.)))~fitted(.),
type=c("p","smooth"),ylab=expression(sqrt(abs(resid))))
#plot(mod.new,resid(.,type="pearson")~twophase.data$SetInd, type=c("p","smooth"))
#dev.off()
#quartz()
#png(filename="merplot3.png")
qqnorm(resid(mod.new))
qqline(resid(mod.new)) # shape due to using whole numbers 1:40.
hist(resid(mod.new),100)
#dev.off()
###########################################################################################
startdat<-select(twophase.data,ID,Type,SetInd,phase,b1,b2)
startdat$SetInd<-as.integer(startdat$SetInd)
newdat1d<-startdat[startdat$SetInd<31,]
newdat2d<-startdat[startdat$SetInd>30,]
newdat2d<-newdat2d[newdat2d$SetInd<41,]
#plots for lmer
ggplot(twophase.data, aes(x = SetInd, y = TargetRT,color=Type)) +
geom_point(alpha=0.35) +
geom_vline(aes(xintercept = bp1), color = 'grey', size = 1, linetype = 'dashed') +
geom_line(data=newdat1d,aes(y=predict(mod.new,newdata=newdat1d)),size = .75)+
geom_line(data=newdat2d,aes(y=predict(mod.new,newdata=newdat2d)),size = .75)+
theme_bw()+facet_wrap(~ID)+ scale_fill_brewer(palette="Set1")+
theme(legend.position = "top",strip.text=element_text(size=12),axis.text=element_text(size=12),axis.title=element_text(size=12,face="bold"))
library(lme4)
library(plyr)
N=40
nsim<-1
expdat_rd<-expand.grid(Type=factor(1:4),setInd=1:40,ID=factor(1:N))
#Recode into familiar factors for Kuppu/DB hypotheses.
expdat_rd$Type<-car::recode(expdat_rd$Type,"1='Non_D';2='Adj_D';3='Adj_P';4='rand'")
expdat_rd$Type<-factor(expdat_rd$Type,levels=c("rand","Adj_D", "Adj_P", "Non_D"))
myseq<-seq(1,nblocks*nsets)
bp1<-myseq[phase1.end]+1
b1make <- function(x, bp1) ifelse(x < bp1, bp1-x, 0)
b2make <- function(x, bp1) ifelse(x >= bp1, x - bp1+1, 0)
expdat_rd$b1<-b1make(expdat_rd$setInd,bp1)
expdat_rd$b2<-b2make(expdat_rd$setInd,bp1)
expdat_rd$phase <- ifelse(expdat_rd$setInd < bp1, 1, 0)
#Extract parameters for simulating data. (artificially reduce the effects of setInd and Type to see if power is working. Are the estimates reasonable)
newparams_rd <- list(
beta = getME(mod.new,"beta"), #fixed effects (intercept, Block, TPPR)
theta = getME(mod.new, "theta"),#Random slope
sigma = getME(mod.new, "sigma"))
# Simulate:
ss <- simulate(~ Type*phase + b2*Type + (0+b1|ID)+(0+b1|ID:Type), nsim = nsim, newdata = expdat_rd, newparams = newparams_rd, family=gaussian, re.form=~0, allow.new.levels=TRUE)
#####################################################################
expdat_rd$TargetRT <- ss[, 1]
new1<- lmer(TargetRT ~ Type*phase +b2*Type + (0+b1|ID)+(0+b1|ID:Type), data = expdat_rd, REML = TRUE,control = lmerControl(optimizer = "optimx", calc.derivs = TRUE, optCtrl = list(method = "nlminb")))
summary(new1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: TargetRT ~ Type * phase + b2 * Type + (0 + b1 | ID) + (0 + b1 |
## ID:Type)
## Data: expdat_rd
## Control:
## lmerControl(optimizer = "optimx", calc.derivs = TRUE, optCtrl = list(method = "nlminb"))
##
## REML criterion at convergence: 84156.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.2558 -0.6616 0.0060 0.6711 3.3572
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID:Type b1 29.70 5.450
## ID b1 59.31 7.701
## Residual 28303.96 168.238
## Number of obs: 6400, groups: ID:Type, 160; ID, 40
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 609.99291 18.17176 33.57
## TypeAdj_D -67.02826 25.69875 -2.61
## TypeAdj_P -32.95021 25.69875 -1.28
## TypeNon_D -3.76528 25.69875 -0.15
## phase -18.22049 20.29025 -0.90
## b2 -0.57944 2.92864 -0.20
## TypeAdj_D:phase -311.76937 28.52770 -10.93
## TypeAdj_P:phase -235.17603 28.52770 -8.24
## TypeNon_D:phase -190.13028 28.52770 -6.66
## TypeAdj_D:b2 4.32523 4.14173 1.04
## TypeAdj_P:b2 0.95538 4.14173 0.23
## TypeNon_D:b2 -0.06686 4.14173 -0.02
##
## Correlation of Fixed Effects:
## (Intr) TypA_D TypA_P TypN_D phase b2 TyA_D: TyA_P: TyN_D:
## TypeAdj_D -0.707
## TypeAdj_P -0.707 0.500
## TypeNon_D -0.707 0.500 0.500
## phase -0.896 0.633 0.633 0.633
## b2 -0.886 0.627 0.627 0.627 0.794
## TypAdj_D:ph 0.637 -0.901 -0.450 -0.450 -0.703 -0.565
## TypAdj_P:ph 0.637 -0.450 -0.901 -0.450 -0.703 -0.565 0.500
## TypNn_D:phs 0.637 -0.450 -0.450 -0.901 -0.703 -0.565 0.500 0.500
## TypAdj_D:b2 0.627 -0.886 -0.443 -0.443 -0.561 -0.707 0.799 0.399 0.399
## TypAdj_P:b2 0.627 -0.443 -0.886 -0.443 -0.561 -0.707 0.399 0.799 0.399
## TypeNn_D:b2 0.627 -0.443 -0.443 -0.886 -0.561 -0.707 0.399 0.399 0.799
## TA_D:2 TA_P:2
## TypeAdj_D
## TypeAdj_P
## TypeNon_D
## phase
## b2
## TypAdj_D:ph
## TypAdj_P:ph
## TypNn_D:phs
## TypAdj_D:b2
## TypAdj_P:b2 0.500
## TypeNn_D:b2 0.500 0.500
ifelse(any(round(sqrt(diag(vcov(new1)))[7:9],3)>30),print("SE: too large"),print("SE: OK"))
## [1] "SE: OK"
## [1] "SE: OK"